Fix use of the phys_to_machine_mapping table in Linux 2.4 and 2.6.
We now ensure that the table contains no MFNs that do not belong
the OS --- invalid entries contain a sentinel value; deliberate
foreign mappings have the high bit set. This means the pte_page() and
pte_pfn() will do the right thing despite possible aliasing in the
M2P table.
} user_balloon_op_t;
/* END OF USER DEFINE */
-/* Dead entry written into balloon-owned entries in the PMT. */
-#define DEAD 0xdeadbeef
-
static struct proc_dir_entry *balloon_pde;
unsigned long credit;
static unsigned long current_pages, most_seen_pages;
+/*
+ * Dead entry written into balloon-owned entries in the PMT.
+ * It is deliberately different to INVALID_P2M_ENTRY.
+ */
+#define DEAD 0xdead1234
+
static inline pte_t *get_ptep(unsigned long addr)
{
pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
for ( i = 0; i < num_pages; i++, currp++ )
{
struct page *page = alloc_page(GFP_HIGHUSER);
- unsigned long pfn = page - mem_map;
+ unsigned long pfn = page - mem_map;
/* If allocation fails then free all reserved pages. */
- if ( page == 0 )
+ if ( page == NULL )
{
- printk(KERN_ERR "Unable to inflate balloon by %ld, only %ld pages free.",
- num_pages, i);
+ printk(KERN_ERR "Unable to inflate balloon by %ld, only"
+ " %ld pages free.", num_pages, i);
currp = parray;
- for(j = 0; j < i; j++, ++currp){
+ for ( j = 0; j < i; j++, currp++ )
__free_page((struct page *) (mem_map + *currp));
- }
ret = -EFAULT;
goto cleanup;
}
{
unsigned long mfn = phys_to_machine_mapping[*currp];
curraddr = (unsigned long)page_address(mem_map + *currp);
- if (curraddr)
+ if ( curraddr != 0 )
queue_l1_entry_update(get_ptep(curraddr), 0);
-
phys_to_machine_mapping[*currp] = DEAD;
*currp = mfn;
}
XEN_flush_page_update_queue();
new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
parray, num_pages, 0);
- if (new_page_cnt != num_pages)
+ if ( new_page_cnt != num_pages )
{
printk(KERN_WARNING
"claim_new_pages: xen granted only %lu of %lu requested pages\n",
new_page_cnt, num_pages);
- /* XXX
- * avoid xen lockup when user forgot to setdomainmaxmem. xen
- * usually can dribble out a few pages and then hangs
+ /*
+ * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
+ * usually can dribble out a few pages and then hangs.
*/
- if (new_page_cnt < 1000) {
+ if ( new_page_cnt < 1000 )
+ {
printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
parray, new_page_cnt, 0);
}
}
memcpy(phys_to_machine_mapping+most_seen_pages, parray,
- new_page_cnt * sizeof(unsigned long));
+ new_page_cnt * sizeof(unsigned long));
pagetable_extend(most_seen_pages,new_page_cnt);
/*
* make a new phys map if mem= says xen can give us memory to grow
*/
- if (max_pfn > start_info.nr_pages) {
+ if ( max_pfn > start_info.nr_pages )
+ {
extern unsigned long *phys_to_machine_mapping;
unsigned long *newmap;
newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
- phys_to_machine_mapping = memcpy(newmap, phys_to_machine_mapping,
- start_info.nr_pages * sizeof(unsigned long));
+ memset(newmap, ~0, max_pfn * sizeof(unsigned long));
+ memcpy(newmap, phys_to_machine_mapping,
+ start_info.nr_pages * sizeof(unsigned long));
+ phys_to_machine_mapping = newmap;
}
return 0;
* then we'll have p2m(m2p(MFN))==MFN.
* If we detect a special mapping then it doesn't have a 'struct page'.
* We force !VALID_PAGE() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ *
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ * require. In all the cases we care about, the high bit gets shifted out
+ * (e.g., phys_to_machine()) so behaviour there is correct.
*/
+#define INVALID_P2M_ENTRY (~0UL)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_page(_pte) \
({ \
unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
pfn = pte->pte_low >> PAGE_SHIFT;
queue_l1_entry_update(pte, 0);
+ phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] =
+ INVALID_P2M_ENTRY;
flush_page_update_queue();
if (HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
&pfn, 1, 0) != 1) BUG();
pfn+i, (__pa(ret)>>PAGE_SHIFT)+i);
phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] =
pfn+i;
- flush_page_update_queue();
}
flush_page_update_queue();
}
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
pfn_array[i] = pte->pte_low >> PAGE_SHIFT;
queue_l1_entry_update(pte, 0);
- phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = 0xdeadbeef;
+ phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = INVALID_P2M_ENTRY;
}
flush_page_update_queue();
mcl[i].args[3] = blkif->domid;
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- phys_seg[i].buffer >> PAGE_SHIFT;
+ FOREIGN_FRAME(phys_seg[i].buffer >> PAGE_SHIFT);
}
if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) )
/******************************************************************************
- * block.c
+ * blkfront.c
*
* XenLinux virtual block-device driver.
*
{
unsigned long free = rec_ring_free;
- if(free>BLKIF_RING_SIZE) BUG();
+ if ( free > BLKIF_RING_SIZE )
+ BUG();
rec_ring_free = rec_ring[free].id;
- rec_ring[free].id = 0x0fffffee; // debug
+ rec_ring[free].id = 0x0fffffee; /* debug */
return free;
}
id = GET_ID_FROM_FREELIST();
rec_ring[id].id = (unsigned long) req;
-//printk(KERN_ALERT"r: %d req %p (%ld)\n",req_prod,req,id);
-
ring_req->id = id;
ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
BLKIF_OP_READ;
DPRINTK("Entered do_blkif_request\n");
-//printk(KERN_ALERT"r: %d req\n",req_prod);
-
queued = 0;
while ((req = elv_next_request(rq)) != NULL) {
continue;
}
- if (BLKIF_RING_FULL) {
+ if ( BLKIF_RING_FULL )
+ {
blk_stop_queue(rq);
break;
}
id = bret->id;
req = (struct request *)rec_ring[id].id;
-//printk(KERN_ALERT"i: %d req %p (%ld)\n",i,req,id);
-
blkif_completion( &rec_ring[id] );
- ADD_ID_TO_FREELIST(id); // overwrites req
+ ADD_ID_TO_FREELIST(id); /* overwrites req */
switch ( bret->operation )
{
req->nr_segments = 1;
req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
-//printk("N: %d req %p (%ld)\n",req_prod,rec_ring[xid].id,xid);
-
req_prod++;
/* Keep a private copy so we can reissue requests when recovering. */
id = bret->id;
bh = (struct buffer_head *)rec_ring[id].id;
-//printk("i: %d req %p (%ld)\n",i,bh,id);
-
blkif_completion( &rec_ring[id] );
ADD_ID_TO_FREELIST(id);
xreq->operation = req->operation;
xreq->nr_segments = req->nr_segments;
xreq->device = req->device;
- // preserve id
+ /* preserve id */
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ )
- {
- xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) |
- (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] <<
- PAGE_SHIFT);
- }
-
+ xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
}
static inline void translate_req_to_mfn(blkif_request_t *xreq,
xreq->operation = req->operation;
xreq->nr_segments = req->nr_segments;
xreq->device = req->device;
- xreq->id = req->id; // copy id (unlike above)
+ xreq->id = req->id; /* copy id (unlike above) */
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ )
- {
- xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) |
- (phys_to_machine_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] <<
- PAGE_SHIFT);
- }
+ xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
}
static inline void flush_requests(void)
{
DISABLE_SCATTERGATHER();
-//printk(KERN_ALERT"flush %d\n",req_prod);
wmb(); /* Ensure that the frontend can see the requests. */
blk_ring->req_prod = req_prod;
notify_via_evtchn(blkif_evtchn);
blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
rec_ring[id].id = (unsigned long) req;
-//printk("c: %d req %p (%ld)\n",req_prod,req,id);
-
translate_req_to_pfn( &rec_ring[id], req );
req_prod++;
" in state %d\n", blkif_state);
break;
}
+
blkif_evtchn = status->evtchn;
- blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
- if ( (rc=request_irq(blkif_irq, blkif_int,
- SA_SAMPLE_RANDOM, "blkif", NULL)) )
- {
+ blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+
+ if ( (rc = request_irq(blkif_irq, blkif_int,
+ SA_SAMPLE_RANDOM, "blkif", NULL)) )
printk(KERN_ALERT"blkfront request_irq failed (%ld)\n",rc);
- }
if ( recovery )
{
/* Hmm, requests might be re-ordered when we re-issue them.
This will need to be fixed once we have barriers */
- // req_prod = 0; : already is zero
-
- // stage 1 : find active and move to safety
- for ( i=0; i <BLKIF_RING_SIZE; i++ )
+ /* Stage 1 : Find active and move to safety. */
+ for ( i = 0; i < BLKIF_RING_SIZE; i++ )
{
if ( rec_ring[i].id >= PAGE_OFFSET )
{
translate_req_to_mfn(
- &blk_ring->ring[req_prod].req, &rec_ring[i] );
-
+ &blk_ring->ring[req_prod].req, &rec_ring[i]);
req_prod++;
}
}
-printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
+ printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
- // stage 2 : set up shadow list
- for ( i=0; i<req_prod; i++ )
+ /* Stage 2 : Set up shadow list. */
+ for ( i = 0; i < req_prod; i++ )
{
rec_ring[i].id = blk_ring->ring[i].req.id;
blk_ring->ring[i].req.id = i;
- translate_req_to_pfn( &rec_ring[i], &blk_ring->ring[i].req );
+ translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req);
}
- // stage 3 : set up free list
+ /* Stage 3 : Set up free list. */
for ( ; i < BLKIF_RING_SIZE; i++ )
rec_ring[i].id = i+1;
rec_ring_free = req_prod;
/* Kicks things back into life. */
flush_requests();
-
-
-
}
else
{
/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
-void blkif_completion( blkif_request_t *req )
+void blkif_completion(blkif_request_t *req)
{
int i;
{
unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
unsigned long mfn = phys_to_machine_mapping[pfn];
-
queue_machphys_update(mfn, pfn);
}
-
break;
}
mdata = virt_to_machine(vdata);
new_mfn = get_new_mfn();
+ /*
+ * Set the new P2M table entry before reassigning the old data page.
+ * Heed the comment in pgtable-2level.h:pte_page(). :-)
+ */
+ phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
+
mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu[0].val = __pa(vdata) >> PAGE_SHIFT;
mmu[1].ptr = MMU_EXTENDED_COMMAND;
mdata = ((mmu[2].ptr & PAGE_MASK) |
((unsigned long)skb->data & ~PAGE_MASK));
- phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
-
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
}
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
- txreq.addr >> PAGE_SHIFT;
+ FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
__skb_put(skb, PKT_PROT_LEN);
memcpy(skb->data,
rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT;
- /* remove this page from pseudo phys map (migration optimization) */
+ /* Remove this page from pseudo phys map before passing back to Xen. */
phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT]
- = 0x80000001;
+ = INVALID_P2M_ENTRY;
rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
rx_mcl[nr_pfns].args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
mcl->args[2] = 0;
mcl++;
(void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
-
-#if 0
- if (unlikely(rx_mcl[0].args[5] != 0))
- printk(KERN_ALERT"Hypercall0 failed %u\n",np->rx->resp_prod);
-
- if (unlikely(rx_mcl[1].args[5] != 0))
- printk(KERN_ALERT"Hypercall1 failed %u\n",np->rx->resp_prod);
-#endif
-
}
while ( (skb = __skb_dequeue(&rxq)) != NULL )
* not have MFN in our p2m table. Conversely, if the page is ours,
* then we'll have p2m(m2p(MFN))==MFN.
* If we detect a special mapping then it doesn't have a 'struct page'.
- * We force !VALID_PAGE() by returning an out-of-range pointer.
+ * We force !pfn_valid() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ *
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ * require. In all the cases we care about, the high bit gets shifted out
+ * (e.g., phys_to_machine()) so behaviour there is correct.
*/
-#define pte_page(_pte) \
-({ \
- unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \
- unsigned long pfn = mfn_to_pfn(mfn); \
- if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \
- pfn = max_mapnr; /* special: force !VALID_PAGE() */ \
- pfn_to_page(pfn); \
-})
-
-#define pte_none(x) (!(x).pte_low)
-/* See comments above pte_page */
-/* XXXcl check pte_present because msync.c:filemap_sync_pte calls
- * without pte_present check */
+#define INVALID_P2M_ENTRY (~0UL)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_pfn(_pte) \
({ \
unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \
- unsigned long pfn = pte_present(_pte) ? mfn_to_pfn(mfn) : mfn; \
+ unsigned long pfn = mfn_to_pfn(mfn); \
if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
+#define pte_page(_pte) pfn_to_page(pte_pfn(_pte))
+
+#define pte_none(x) (!(x).pte_low)
+
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pte_ma(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))